# generate wordpiece

text_in=$1
num_wpm=$2
output_dir=$3

mkdir -p $output_dir

cut -d" " -f 2- $text_in > ${text_in}.tmp
echo 'Remove the sentence id from the text file!'

spm_train --input=${text_in}.tmp --model_prefix=$output_dir/bpe.${num_wpm} --vocab_size=$num_wpm --character_coverage=1.0 --model_type=bpe || exit 1;
rm ${text_in}.tmp || exit 1;
echo 'Delete the tmp files!'
echo 'Done'